clear

cap program drop superclean_basic
include "codes/auxiliary_programs/superclean_basic.ado"

*-----------------------------------------------*
* 1. 2012 Census data at the municipality level *
*-----------------------------------------------*
* Sheet POBLACION
import excel "data/inputs/1_Bolivia_censo_poblacion_2012.xls", ///
	   clear sheet("POBLACIÓN")

keep A O E L U-Z
rename A mpio
rename E poblacion
rename O alfabetismo
rename L rural
rename V ed_prim
rename W ed_sec

drop U X Y Z

gen depto=upper(mpio)
replace depto="" if mpio!=depto
replace depto=depto[_n-1] if depto=="" & depto[_n-1]!=""
drop if mpio==depto
gen prov = mpio if poblacion == ""
replace prov = prov[_n-1] if prov == "" & depto == depto[_n-1]
drop if poblacion == ""

drop in 1/1
compress
tempfile education
save `education'

* Sheet POBREZA
import excel using "data/inputs/1_Bolivia_censo_poblacion_2012.xls", ///
	clear sheet("POBREZA")

keep A C
rename A mpio
rename C pobreza

gen depto=upper(mpio)
replace depto="" if mpio!=depto
replace depto=depto[_n-1] if depto=="" & depto[_n-1]!=""
drop if mpio==depto
gen prov = mpio if pobreza == ""
replace prov = prov[_n-1] if prov == "" & depto == depto[_n-1]
drop if pobreza == ""

compress
tempfile poverty
save `poverty'

* Sheet CARACTERÍSTICAS ECONÓMICAS
import excel using "data/inputs/1_Bolivia_censo_poblacion_2012.xls", ///
	clear sheet("CARACTERÍSTICAS ECONÓMICAS")

keep A L
rename A mpio
rename L agricultural_labor

gen depto=upper(mpio)
replace depto="" if mpio!=depto
replace depto=depto[_n-1] if depto=="" & depto[_n-1]!=""
drop if mpio==depto
gen prov = mpio if agricultural_labor == ""
replace prov = prov[_n-1] if prov == "" & depto == depto[_n-1]
drop if agricultural_labor == ""

drop in 1/1
compress
tempfile ag_labor
save `ag_labor'

* Merge
use `education', clear
merge 1:1 depto prov mpio using `poverty'
drop _m
replace prov = "Burnet Oconnor" if regexm(prov, "Burnet")
merge 1:1 depto prov mpio using `ag_labor'
drop _m

*Organize
destring poblacion-agricultural_labor, replace

superclean_basic depto
superclean_basic prov
superclean_basic mpio

order depto prov mpio
sort depto prov mpio
compress
tempfile census2012_municipal
save `census2012_municipal'

*------------------------------------------------------------------*
* 2. Coordinates of Municipalities Centroides from ArcGIS 		   *
*------------------------------------------------------------------*
insheet using "data/inputs/1_Bolivia_coordinates.txt", clear
keep adm1_es adm2_es adm3_es coord*

superclean_basic adm*
rename adm1 depto
rename adm2 prov
rename adm3 mpio

do "codes/auxiliary_programs/2_data_names_coordinates.do"

tempfile coordinates
save `coordinates'

*-------------------*
* 3. Student's Data *
*-------------------*
insheet using "data/inputs/1_Bolivia_data_from_student.csv", clear

keep nomdep nomprov nombremunicipio asientoelectoral nombrerecinto density
collapse (mean) density, by(nomdep nomprov nombremunicipio asientoelectoral nombrerecinto)

superclean_basic nomdep nomprov nombremunicipio asientoelectoral nombrerecinto

rename nomdep depto
rename nomprov prov
rename nombremunicipio mpio
rename asientoelectoral loc
rename nombrerecinto reci

drop if reci == ""

do "codes/auxiliary_programs/2_data_names_studentdata.do"

compress 
tempfile students_data
save `students_data'

*--------------------*
* 4. 2016 data		 *
*--------------------*
do "codes/additional_code/merge_2016_2019.do"

rename NúmeroMesa NumMesa

compress
tempfile merge_2019_2016
save `merge_2019_2016'

*-------------------------------------------------------------------*
* 4. Create data set of covariates with numeric ID from main data   *
*-------------------------------------------------------------------*
* Keep identifiers from 2019 electoral data	
use "data/outputs/computo_ulttranstrepdate_missingsatmunmedian.dta", clear
keep Pais Dep Prov Muni Loc Reci NumMesa
duplicates drop

* Merge 2016 electoral data
merge 1:1 NumMesa using `merge_2019_2016'
drop _merge

* Clean up strings in order to merge on names
gen str Reci2 = regexs(1) if regexm(Reci, "^(.+)-")
gen str Reci3 = regexs(1) if regexm(Reci2, "^(.+)-")
gen reci = Reci3 if Reci3 != ""
replace reci = Reci2 if Reci2 != "" & Reci3 == ""
replace reci = Reci if reci == ""
drop Reci2 Reci3 

superclean_basic Dep Prov Muni Loc reci

rename Dep depto 
rename Prov prov
rename Muni mpio
rename Loc loc 
rename Reci Reci_Original

replace reci = subinstr(reci,"ue ","",.)
replace reci = subinstr(reci,"u e ","",.)
replace reci = trim(reci)
replace reci = itrim(reci)
replace loc = subinstr(loc, `"""', "", .)
compress

* Merge student's data (recincto level)
merge m:1 depto prov mpio loc reci using `students_data'
	/* NOTE: Of 33,048 mesas in the electoral data IN BOLIVIA,
	         32,997 are matched to the student's data, while
			 51 are not matched. The other unmatched mesas
			 are outside of Bolivia and excluded from the 
			 analysis that includes covariates.*/
su _m if _m == 2
assert `r(N)' == 4			 
drop if _m == 2			 
drop _m

* Merge municipio centroid coordinates (municipio level)
merge m:1 depto prov mpio using `coordinates'
su _m if _m == 2
assert `r(N)' == 2 /* NOTE, these two do not have mpio names and cannot be merged */
drop if _m == 2
drop _m

* Merge census data (municipio level)
do "codes/auxiliary_programs/2_data_municipio_names.do"
merge m:1 depto prov mpio using `census2012_municipal' 
su _m if _m == 2
assert `r(N)' == 0
drop _m

*-------------------------------------------------------------------*
* 5. Constructing control variables								    *
*-------------------------------------------------------------------*

* Replacing some missing values manually
*---------------------------------------
* Population
replace poblacion = 14589 if ///
	depto == "La Paz" & prov == "Camacho" & mpio == "Puerto Mayor de Carabuco"

replace poblacion = 10748 if ///
	depto == "Chuquisaca" & prov == "Luis Calvo" & ///
	mpio == "Villa Vaca Guzmán" //Municipality also known as Muyupampa

replace poblacion = 7344 if ///
	depto == "Cochabamba" & prov == "Mizque" & ///
	mpio == "Raqaypampa" /*  Indigenous territory, doesn't appear in the Census. 
							 Population extracted from: 
							 https://www.raqaypampa.gob.bo/territorio-y-division-politico-administrativa/	
							 */
							 
replace poblacion = 28922 if ///
	depto == "Santa Cruz" & prov == "Chiquitos" & mpio == "San José"	

* Distance to capital of department
rename coordx longitud
rename coordy latitud
replace latitud = -18.189008 if ///
	depto == "Cochabamba" & prov == "Mizque" & mpio == "Raqaypampa"
replace longitud=-65.381503 if ///
	depto == "Cochabamba" & prov == "Mizque" & mpio == "Raqaypampa"

* Agricultural Labor
replace agricultural_labor = 74 if ///
	depto == "La Paz" & prov == "Camacho" & mpio == "Puerto Mayor de Carabuco"

replace agricultural_labor = 55.8 if ///
	depto == "Chuquisaca" & prov == "Luis Calvo" & mpio == "Villa Vaca Guzmán" //Municipality also known as Muyupampa

replace agricultural_labor = 39.3 if ///
	depto == "Santa Cruz" & prov == "Chiquitos" & mpio == "San José"	

* Census covariates
gen analfabetismo = 100 - alfabetismo
gen logpop = log(poblacion)

* Generating a dummy for capital of department
gen capital_depto=0 if Pais=="Bolivia"
replace capital_depto=1 if (depto=="beni" & mpio=="trinidad") | ///
	(depto=="chuquisaca" & mpio=="sucre") | ///
	(depto=="cochabamba" & mpio=="cochabamba") | ///
	(depto=="la paz" & mpio=="la paz") | ///
	(depto=="oruro" & mpio=="oruro") | ///
	(depto=="pando" & mpio=="cobija") | ///
	(depto=="potosi" & mpio=="potosi") | ///
	(depto=="santa cruz" & mpio=="santa cruz de la sierra") | ///
	(depto=="tarija" & mpio=="tarija")

* Minimum distance to (any) capital of department
preserve
	keep if Pais=="Bolivia"
	keep capital_depto latitud longitud depto mpio
	keep if capital_depto == 1
	duplicates drop
	keep latitud longitud
	gen t = _n
	gen t2 = 1
	reshape wide latitud longitud, i(t2) j(t)

	tempfile capitals
	save `capitals'
restore

gen t2 = 1
merge m:1 t2 using `capitals'
drop _merge t2

forvalues k = 1/9 {
	geodist latitud longitud latitud`k' longitud`k', generate(dist`k')
	drop latitud`k' longitud`k'
}
egen dist_capital = rowmin(dist1 - dist9)
drop dist1 - dist9
drop lat* long*

* Media luna
gen medialuna = (depto == "pando" | depto == "beni" | depto == "santa cruz" | ///
	depto == "tarija")

* Taking the log of some covariates
gen ln_aglabor = ln(agricultural_labor)
gen ln_density = ln(density)
gen ln_dist_capital = ln(1 + dist_capital)

* 2016 
	gen mshare2016=si2016/(si2016+no2016)
	gen cshare2016=1-mshare2016
	gen margin2016=mshare2016-cshare2016
	
* Drop some variables that we don't use
drop no2016-dr2016
drop loc CódigoMesa recinto2016

* Saving
compress
drop Pais depto prov mpio Reci_Original reci

*--
